require("lgpr")
# Loading required package: lgpr
# Attached lgpr 1.1.4, using rstan 2.21.2. Type ?lgpr to get started.
require("ggplot2")
# Loading required package: ggplot2
require("rstan")
# Loading required package: rstan
# Loading required package: StanHeaders
# rstan (Version 2.21.2, GitRev: 2e1f913d3ca3)
# For execution on a local, multicore CPU with excess RAM we recommend calling
# options(mc.cores = parallel::detectCores()).
# To avoid recompilation of unchanged Stan programs, we recommend calling
# rstan_options(auto_write = TRUE)
In this tutorial we simulate and analyse count data.
We first show how to generate binomial data
set.seed(131)
simData <- simulate_data(N = 12,
t_data = seq(12, 72, by = 12),
covariates = c( 2),
noise_type = "binomial",
relevances = c(0,1,1),
lengthscales = c(18,24,18),
N_trials = 100,
t_jitter = 1)
plot_sim(simData)
# - Dots are noisy observations of the response var.
# - Line is the true signal mapped through inv. link fun.
# (and multiplied by number of trials)
The noise level in the previous example was rather low. To generate more noisy observations, we can draw from the beta-binomial distribution. The overdispersion parameter \(\gamma \in (0,1)\) controls the noise level, so that \(\gamma \rightarrow 0\) would be equivalent to binomial noise (no overdispersion) and larger values mean more noise. Here we set gamma = 0.3.
set.seed(131)
simData <- simulate_data(N = 12,
t_data = seq(12, 72, by = 12),
covariates = c( 2),
noise_type = "bb",
gamma = 0.3,
relevances = c(0,1,1),
lengthscales = c(18,24,18),
N_trials = 100,
t_jitter = 1)
plot_sim(simData)
# - Dots are noisy observations of the response var.
# - Line is the true signal mapped through inv. link fun.
# (and multiplied by number of trials)
dat <- simData@data
str(dat)
# 'data.frame': 72 obs. of 4 variables:
# $ id : Factor w/ 12 levels "1","2","3","4",..: 1 1 1 1 1 1 2 2 2 2 ...
# $ age: num 11.2 23.5 37 48 59.5 ...
# $ z : Factor w/ 2 levels "1","2": 2 2 2 2 2 2 1 1 1 1 ...
# $ y : int 17 42 80 69 2 0 25 88 73 22 ...
To use beta-binomial observation model in our analysis, we use the likelihood argument of lgp(). Here we define a \(\text{Beta}(2,2)\) prior for the gamma parameter.
my_prior <- list(
alpha = student_t(20),
gamma = bet(2, 2)
)
We fit a model using the generated beta-binomial data.
fit <- lgp(y ~ age + age|id + age|z,
data = dat,
likelihood = "bb",
num_trials = 100,
prior = my_prior,
refresh = 300,
chains = 3,
cores = 3,
control = list(adapt_delta = 0.95),
iter = 3000)
# Warning: Tail Effective Samples Size (ESS) is too low, indicating posterior variances and tail quantiles may be unreliable.
# Running the chains for more iterations may help. See
# http://mc-stan.org/misc/warnings.html#tail-ess
print(fit)
# An object of class lgpfit. See ?lgpfit for more info.
# Inference for Stan model: lgp_latent.
# 3 chains, each with iter=3000; warmup=1500; thin=1;
# post-warmup draws per chain=1500, total post-warmup draws=4500.
#
# mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
# alpha[1] 0.95 0.01 0.39 0.39 0.67 0.88 1.16 1.92 2066 1.00
# alpha[2] 0.41 0.02 0.30 0.02 0.17 0.35 0.59 1.14 327 1.01
# alpha[3] 1.02 0.01 0.45 0.40 0.71 0.93 1.24 2.18 2073 1.00
# ell[1] 0.75 0.01 0.37 0.25 0.53 0.71 0.92 1.46 1442 1.00
# ell[2] 1.09 0.04 1.39 0.11 0.36 0.63 1.24 4.74 1516 1.00
# ell[3] 0.69 0.01 0.38 0.16 0.42 0.63 0.88 1.60 1264 1.00
# gamma[1] 0.26 0.00 0.06 0.11 0.23 0.27 0.30 0.36 386 1.01
#
# Samples were drawn using NUTS(diag_e) at Tue Aug 10 22:46:32 2021.
# For each parameter, n_eff is a crude measure of effective sample size,
# and Rhat is the potential scale reduction factor on split chains (at
# convergence, Rhat=1).
We see that the posterior mean of the gamma parameter is close to the value used in simulation (0.3). We visualize its posterior draws for each MCMC chain:
plot_draws(fit, type = "trace", regex_pars = "gamma")
relevances(fit)
# gp(age) gp(age)*zs(id) gp(age)*zs(z) noise
# 0.20713820 0.08643909 0.21994630 0.48647641
We subsample 50 draws from the posterior of each component \(f^{(j)}\), \(j=1,2,3\)
draws <- sample.int(1000, 50)
and visualize them
p <- pred(fit, draws = draws)
plot_components(fit, pred = p, alpha = 0.1, color_by = c(NA, NA, "z", "z"))
as well as the sum \(f = f^{(1)} + f^{(2)} + f^{(3)}\)
plot_pred(fit, pred = p, alpha = 0.3)
t <- seq(2, 100, by = 2)
x_pred <- new_x(dat, t)
p <- pred(fit, x_pred, draws = draws)
# Computations will require creating and handling 600 x 72 matrices 200 times.
# Extrapolating function posterior draws...
# | 10%| 20%| 30%| 40%| 50%| 60%| 70%| 80%| 90%| 100%|
# ======================================================================
# c_hat_pred not given, using constant c_hat_pred = -0.0794862679828044
# Done.
plot_pred(fit, p, alpha = 0.3)
plot_components(fit, pred = p, alpha = 0.1, color_by = c(NA, NA, "z", "z"))
sessionInfo()
# R version 4.1.0 (2021-05-18)
# Platform: x86_64-apple-darwin17.0 (64-bit)
# Running under: macOS Big Sur 10.16
#
# Matrix products: default
# BLAS: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.dylib
# LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
#
# locale:
# [1] C/UTF-8/C/C/C/C
#
# attached base packages:
# [1] stats graphics grDevices utils datasets methods base
#
# other attached packages:
# [1] rstan_2.21.2 StanHeaders_2.21.0-7 ggplot2_3.3.5
# [4] lgpr_1.1.4 rmarkdown_2.8
#
# loaded via a namespace (and not attached):
# [1] tidyselect_1.1.1 xfun_0.23 bslib_0.2.5.1 reshape2_1.4.4
# [5] purrr_0.3.4 V8_3.4.2 colorspace_2.0-2 vctrs_0.3.8
# [9] generics_0.1.0 htmltools_0.5.1.1 stats4_4.1.0 loo_2.4.1
# [13] yaml_2.2.1 utf8_1.2.2 rlang_0.4.11 pkgbuild_1.2.0
# [17] jquerylib_0.1.4 pillar_1.6.2 glue_1.4.2 withr_2.4.2
# [21] plyr_1.8.6 matrixStats_0.60.0 lifecycle_1.0.0 stringr_1.4.0
# [25] munsell_0.5.0 gtable_0.3.0 codetools_0.2-18 evaluate_0.14
# [29] labeling_0.4.2 inline_0.3.19 knitr_1.33 callr_3.7.0
# [33] ps_1.6.0 parallel_4.1.0 curl_4.3.2 bayesplot_1.8.1
# [37] fansi_0.5.0 highr_0.9 rstantools_2.1.1 Rcpp_1.0.7
# [41] scales_1.1.1 RcppParallel_5.1.4 jsonlite_1.7.2 farver_2.1.0
# [45] gridExtra_2.3 digest_0.6.27 stringi_1.7.3 processx_3.5.2
# [49] dplyr_1.0.7 grid_4.1.0 cli_3.0.1 tools_4.1.0
# [53] magrittr_2.0.1 sass_0.4.0 tibble_3.1.3 crayon_1.4.1
# [57] pkgconfig_2.0.3 MASS_7.3-54 ellipsis_0.3.2 prettyunits_1.1.1
# [61] ggridges_0.5.3 R6_2.5.0 compiler_4.1.0